Internet Analytics - Lab 2
Group: E
Names:
This is a template for part 3 of the lab. Clearly write your answers, comments and interpretations in Markodown cells. Don't forget that you can add $\LaTeX$ equations in these cells. Feel free to add or remove any cell.
Please properly comment your code. Code readability will be considered for grading. To avoid long cells of codes in the notebook, you can also embed long python functions and classes in a separate module. Don’t forget to hand in your module if that is the case. In multiple exercises, you are required to come up with your own method to solve various problems. Be creative and clearly motivate and explain your methods. Creativity and clarity will be considered for grading.
import epidemics_helper
import networkx as nx
import json
import numpy as np
import matplotlib.pyplot as plt
# Simulating an epidemic with the following parameters
# start node = 23654
# beta = 10
# gamma = 0.1
# First importing the json data and creating a graph from it
G = nx.Graph()
data = {}
with open('../data/nyc_augmented_network.json') as json_file:
data = json.load(json_file)
edges = data['links']
edges = [(edge['source'], edge['target']) for edge in edges]
G.add_edges_from(edges)
coord = data['nodes']
coord = [c['coordinates'] for c in coord]
# Now we draw the graph
plt.figure(figsize=(10, 10))
nx.draw(G, pos=coord, with_labels=False, node_size=0.1)
plt.show()
# Simulating the epidemic propagation through the graph
sir = epidemics_helper.SimulationSIR(G, beta=10.0, gamma=0.1)
sir.launch_epidemic(source=23654, max_time=100)
Epidemic stopped after 118.80 days | 118.80 days elapsed | 0.1% susceptible, 0.0% infected, 99.9% recovered
# infection_times is a numpy array of the time of infection of each node (the node id is the index)
# recover_times is a numpy array of the time of recovery of each node (the node id is the index)
infection_times = sir.inf_time
recover_times = sir.rec_time
# count the number of recovered nodes at each time step
recovered = np.zeros(100)
infected = np.zeros(100)
suceptible = np.zeros(100)
for i in range(100):
recovered[i] = np.count_nonzero(recover_times <= i)
infected[i] = np.count_nonzero(infection_times <= i) - recovered[i]
suceptible[i] = len(G.nodes()) - infected[i] - recovered[i]
recovered = (recovered / len(G.nodes())) * 100
infected = (infected / len(G.nodes())) * 100
suceptible = (suceptible / len(G.nodes())) * 100
# plotting the percentage of suceptible, infected and recovered nodes at each time step
plt.plot(recovered, label="recovered")
plt.plot(infected, label="infected")
plt.plot(suceptible, label="suceptible")
plt.xlabel("Time")
plt.ylabel("Percentage of nodes")
plt.legend()
plt.show()
# Time at which 60% of the population is infected at the same time
x = np.where(infected >= 60)[0][0]
print("Time at which 60% of the population is infected at the same time: day", x)
# Time at which 60% of the population is either infected or recovered
y = np.where((infected + recovered) >= 60)[0][0]
print("Time at which 60% of the population is either infected or recovered: day", y)
# Time at which 60% of the population is recovered
z = np.where(recovered >= 60)[0][0]
print("Time at which 60% of the population is recovered: day", z)
Time at which 60% of the population is infected at the same time: day 7 Time at which 60% of the population is either infected or recovered: day 6 Time at which 60% of the population is recovered: day 15
def nodes_color(G, sir, day):
colors = []
for node in G.nodes:
status = sir.get_node_status(node, day)
colors.append("green" if status == 0 else "red" if status == 1 else "blue")
return colors
colors_1 = nodes_color(G, sir, 1)
colors_3 = nodes_color(G, sir, 3)
colors_30 = nodes_color(G, sir, 30)
# draw the graph G with the color map colors_30 with the title "graph of new york city at day 30 of epidemic"
plt.figure(figsize=(7, 7))
nx.draw(G, pos=coord, with_labels=False, node_size=0.1, node_color=colors_1)
# add legend to the graph, green = suceptible, red = infected, blue = recovered
plt.legend(handles=[plt.scatter([], [], c="green", s=0.1), plt.scatter([], [], c="red", s=0.1), plt.scatter([], [], c="blue", s=0.1)], labels=["suceptible", "infected", "recovered"])
plt.title("graph of new york city at day 1 of epidemic")
plt.figure(figsize=(7, 7))
nx.draw(G, pos=coord, with_labels=False, node_size=0.1, node_color=colors_3)
plt.legend(handles=[plt.scatter([], [], c="green", s=0.1), plt.scatter([], [], c="red", s=0.1), plt.scatter([], [], c="blue", s=0.1)], labels=["suceptible", "infected", "recovered"])
plt.title("graph of new york city at day 3 of epidemic")
plt.figure(figsize=(7, 7))
nx.draw(G, pos=coord, with_labels=False, node_size=0.1, node_color=colors_30)
plt.legend(handles=[plt.scatter([], [], c="green", s=0.1), plt.scatter([], [], c="red", s=0.1), plt.scatter([], [], c="blue", s=0.1)], labels=["suceptible", "infected", "recovered"])
plt.title("graph of new york city at day 30 of epidemic")
plt.show()
# removing uniformly at random n edges from the graph
def rand_removed_edges(G, n):
"""
:param G: a networkx graph
:param n: the number of edges to remove
:return: a new graph with n edges removed
"""
y = np.random.randint(0, high=len(G.edges), size=n)
z = np.array(G.edges)[y]
H = G.copy()
H.remove_edges_from(z)
return H
# Hepler method to compute the statistics of the simulation at day 30
def simulation_statistics(G, sim, n, strategy):
"""
:param G: the graph
:param sim: the simulation
:param n: the number of edges removed from the graph
:return: prints the number of infected, recovered and suceptible nodes at day 30
"""
nodes = list(G.nodes)
# mapping each node in nodes to its status at day 30
nodes = list(map(lambda x: sim.get_node_status(x, 30), nodes))
suceptible30 = round(nodes.count(0)*100/len(nodes), 2)
infected30 = round(nodes.count(1)*100/len(nodes), 2)
recovered30 = round(nodes.count(2)*100/len(nodes), 2)
print("Statistics for ", n , " edges removed using strategy: ", str(strategy))
print("Suceptible nodes at day 30:", suceptible30, "%")
print("Infected nodes at day 30:", infected30, "%")
print("Recovered nodes at day 30:", recovered30, "%")
# Sampling 1000 edges uniformly at random and removing them from the graph
H1 = rand_removed_edges(G, 1000)
H2 = rand_removed_edges(G, 10000)
# instantiating an sir model
sir1 = epidemics_helper.SimulationSIR(H1, beta=10.0, gamma=0.1, verbose=False)
source1 = np.random.randint(0, high=len(G.nodes))
sir1.launch_epidemic(source=source1, max_time=100)
sir2 = epidemics_helper.SimulationSIR(H2, beta=10.0, gamma=0.1, verbose=False)
source2 = np.random.randint(0, high=len(G.nodes))
sir2.launch_epidemic(source=source2, max_time=100)
# Printing the statistics for the graph with 1000 removed edges
simulation_statistics(H1, sir1, 1000, "random")
print("-----------------------------")
# Printing the statistics for the graph with 10000 removed edges
simulation_statistics(H2, sir2, 10000, "random")
Statistics for 1000 edges removed using strategy: random Suceptible nodes at day 30: 0.39 % Infected nodes at day 30: 8.73 % Recovered nodes at day 30: 90.88 % ----------------------------- Statistics for 10000 edges removed using strategy: random Suceptible nodes at day 30: 10.86 % Infected nodes at day 30: 22.79 % Recovered nodes at day 30: 66.35 %
def strategical_graph(G, n):
"""
:param G: a networkx graph
:param n: the number of edges to remove
:return: a new graph with n edges removed following the strategy described above
"""
# create a copy of the graph
G_copy = G.copy()
# find local bridges in H4 using networkx method
local_bridges = list(nx.local_bridges(G_copy))
local_bridges.sort(key=lambda x: (G.degree(x[0])+G.degree(x[1])/2))
bridges_to_remove = local_bridges[:n]
G_copy.remove_edges_from(bridges_to_remove)
return G_copy
# new Graph with 2500 edges removed using the strategical method
H3 = strategical_graph(G, 2500)
# starting new simulation
sir4 = epidemics_helper.SimulationSIR(H3, beta=10.0, gamma=0.1, verbose=False)
plt.figure(figsize=(7, 7))
nx.draw(G, pos=coord, with_labels=False, node_size=0.1)
plt.title("original graph of new york city")
plt.figure(figsize=(7, 7))
nx.draw(H3, pos=coord, with_labels=False, node_size=0.1)
plt.title("graph of new york city with 2500 local bridges removed")
plt.show()
# helper method to compute the statistics of the simulation at day 30
def simulation_statistics2(G, sim, n):
"""
:param G: the graph
:param sim: the simulation
:param n: the number of edges removed from the graph
:return: prints the number of infected, recovered and suceptible nodes at day 30
"""
suceptibleG = np.zeros(100)
for i in range(100):
sim.launch_epidemic(source=np.random.randint(0, high=len(G.nodes)-1), max_time=100)
state_at_30 = []
for j in list(G.nodes()):
state_at_30.append(sim.get_node_status(j, 30))
suceptibleG[i] = (state_at_30.count(0)*100)/len(G.nodes())
# print the number of infected, recovered and suceptible nodes at day 30
print("Statistics for the graph with ", n, " edges removed based on local bridges and node degree")
print("Average number of suceptible nodes at day 30: ", np.mean(suceptibleG))
# Now we run the simulation for 100 times starting at random nodes
simulation_statistics2(H3, sir4, 2500)
Statistics for the graph with 2500 edges removed based on local bridges and node degree Average number of suceptible nodes at day 30: 72.89751142328463